# -*- coding: utf-8 -*-
"""Untitled241.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1wYnl3jLdIqgv36Wg5T4P7cv0KBy0z4uW
"""

#relevant libraries

import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


# dataset loading
data_path = '/content/Random Forest Training Dataset Dawlish.csv'
data = pd.read_csv(data_path)


# Split the dataset between the feature variables and target
X = data.drop('Wave Overtopping ', axis=1)
y = data['Wave Overtopping ']

# Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)

# Create matrix for XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Define parameters for regression
params = {
    "objective": "reg:squarederror",
    "tree_method": "hist",
    "device": "cpu"
}

# Number of boosting rounds but this can be altered during the tuning process
num_boost_round = 100

# Train the model
model = xgb.train(
    params=params,
    dtrain=dtrain,
    num_boost_round=num_boost_round
)

# Prediction output and error performance
preds = model.predict(dtest)
rmse = np.sqrt(mean_squared_error(y_test, preds))
print(f"RMSE of the base model: {rmse:.3f}")